Purpose:
Runs survival analysis models using splicing cluster assignment and 1) single exon splicing burden index (SBI) or 2) KEGG Spliceosome GSVA scores as a predictor
Uses a wrapper function (survival_analysis) from utils
folder.
Load packages, set directory paths and call setup script
library(tidyverse)
library(survival)
library(ggpubr)
library(ggplot2)
library(patchwork)
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
data_dir <- file.path(root_dir, "data")
analysis_dir <- file.path(root_dir, "analyses", "survival")
input_dir <- file.path(analysis_dir, "results")
results_dir <- file.path(analysis_dir, "results")
plot_dir <- file.path(analysis_dir, "plots")
# If the input and results directories do not exist, create it
if (!dir.exists(results_dir)) {
dir.create(results_dir, recursive = TRUE)
}
source(file.path(analysis_dir, "util", "survival_models.R"))
Set metadata and cluster assignment file paths
metadata_file <- file.path(input_dir, "splicing_indices_with_survival.tsv")
cluster_file <- file.path(root_dir, "analyses",
"sample-psi-clustering", "results",
"sample-cluster-metadata-top-5000-events-stranded.tsv")
kegg_scores_stranded_file <- file.path(root_dir, "analyses",
"sample-psi-clustering", "results",
"gsva_output_stranded.tsv")
Wrangle data Add cluster assignment and spliceosome gsva scores to
metadata and define column lgg_group (LGG or
non_LGG)
metadata <- read_tsv(metadata_file)
Rows: 1312 Columns: 23── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (9): Kids_First_Biospecimen_ID, Histology, Kids_First_Participant_ID, molecular_subtype, extent_of_tumor_resection, EFS_event_type, OS_status, EFS_status...
dbl (14): Total, AS_neg, AS_pos, AS_total, SI_A3SS, SI_A5SS, SI_RI, SI_SE, EFS_days, OS_days, age_at_diagnosis_days, age_at_diagnosis, OS_years, EFS_years
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
clusters <- read_tsv(cluster_file) %>%
dplyr::rename(Kids_First_Biospecimen_ID = sample_id)
Rows: 752 Columns: 8── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (6): sample_id, plot_group, plot_group_hex, RNA_library, molecular_subtype, plot_group_n
dbl (2): cluster, group_n
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gsva_scores <- read_tsv(kegg_scores_stranded_file) %>%
dplyr::filter(geneset == "KEGG_SPLICEOSOME") %>%
dplyr::rename(spliceosome_gsva_score = score)
Rows: 23312 Columns: 3── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): sample_id, geneset
dbl (1): score
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# how many clusters?
n_clust <- length(unique(clusters$cluster))
metadata <- metadata %>%
right_join(clusters %>% dplyr::select(Kids_First_Biospecimen_ID,
cluster)) %>%
left_join(gsva_scores %>% dplyr::select(sample_id,
spliceosome_gsva_score),
by = c("Kids_First_Biospecimen_ID" = "sample_id")) %>%
dplyr::mutate(cluster = glue::glue("Cluster {cluster}")) %>%
dplyr::mutate(cluster = fct_relevel(cluster,
paste0("Cluster ", 1:n_clust))) %>%
dplyr::mutate(lgg_group = case_when(
Histology == "Low-grade glioma" ~ "LGG",
TRUE ~ "non-LGG"
)) %>%
dplyr::mutate(SI_SE = SI_SE * 10)
Joining with `by = join_by(Kids_First_Biospecimen_ID)`
Generate coxph models including extent of tumor resection, lgg group, and cluster assignment and SBI as covariates
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_days+SI_SE",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_SBI.pdf"),
forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_days+SI_SE",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_SBI.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
repeat analysis, replacing SBI with KEGG spliceosome gsva score
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_days+spliceosome_gsva_score",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_spliceosome_score.pdf"),
forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
models <- c("spliceosome_gsva_score", "SI_SE")
for (each in models) {
int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+cluster*", each, "+age_at_diagnosis_days"),
file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
int_forest_efs
ggsave(file.path(plot_dir, paste0("forest_int_EFS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
int_forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
int_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+cluster*", each, "+age_at_diagnosis_days"),
file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
int_forest_os <- plotForest(readRDS(file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
int_forest_os
ggsave(file.path(plot_dir, paste0("forest_int_OS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
int_forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
}
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_days+spliceosome_gsva_score",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_spliceosome_score.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
Subset metadata for LGG, and only include clusters with
>= 10 samples
lgg <- metadata %>%
dplyr::filter(Histology == "Low-grade glioma") %>%
dplyr::mutate(cluster = factor(cluster)) %>%
dplyr::mutate(mol_sub_group = fct_relevel(mol_sub_group, c("Wildtype", "BRAF V600E", "BRAF fusion",
"Other alteration", "SEGA"
)))
retain_clusters_lgg <- lgg %>%
dplyr::count(cluster) %>%
filter(n >= 10) %>%
pull(cluster)
lgg <- lgg %>%
filter(cluster %in% retain_clusters_lgg) %>%
dplyr::mutate(cluster = factor(cluster))
Generate coxph models including covariates
extent_of_tumor_resection, mol_sub_group,
cluster, and SI_SE and plot
# identify LGG clusters
lgg_clusters <- metadata %>%
filter(lgg_group == "LGG") %>%
mutate(cluster = as.integer(gsub("cluster", "", cluster))) %>%
pull(cluster) %>%
sort() %>%
unique()
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `cluster = as.integer(gsub("cluster", "", cluster))`.
Caused by warning:
! NAs introduced by coercion
add_model_lgg_efs <- fit_save_model(lgg[!lgg$cluster %in% lgg_clusters & !lgg$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+mol_sub_group+cluster+age_at_diagnosis_days+SI_SE",
file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_SBI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning: Loglik converged before variable 6,8 ; coefficient may be infinite.
forest_lgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_SBI.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_lgg_efs
ggsave(file.path(plot_dir, "forest_add_EFS_LGG_resection_subtype_cluster_assignment_SBI.pdf"),
forest_lgg_efs,
width = 10, height = 6, units = "in",
device = "pdf")
repeat analysis replacing SI_SE with
spliceosome_gsva_score
add_model_lgg_efs <- fit_save_model(lgg[!lgg$cluster %in% lgg_clusters & !lgg$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+mol_sub_group+cluster+age_at_diagnosis_days+spliceosome_gsva_score",
file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning: Loglik converged before variable 6,8 ; coefficient may be infinite.
forest_lgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_spliceosome_score.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_lgg_efs
ggsave(file.path(plot_dir, "forest_add_EFS_LGG_resection_subtype_cluster_assignment_spliceosome_score.pdf"),
forest_lgg_efs,
width = 10, height = 6, units = "in",
device = "pdf")
Subset metadata for HGG and retain cluster with n >=
10
hgg <- metadata %>%
dplyr::filter(Histology %in% c("Other high-grade glioma", "DIPG or DMG")) %>%
dplyr::mutate(cluster = factor(cluster)) %>%
dplyr::mutate(mol_sub_group = fct_relevel(mol_sub_group, c("HGG, H3 wildtype", "HGG, H3 wildtype, TP53",
"DMG, H3 K28", "DMG, H3 K28, TP53",
"DHG, H3 G35", "DHG, H3 G35, TP53",
"HGG, IDH, TP53", "HGG, PXA", "HGG, PXA, TP53",
"IHG, ALK-altered", "IHG, NTRK-altered",
"IHG, ROS1-altered"
))) %>%
dplyr::filter(!is.na(OS_days) | !is.na(EFS_days))
Warning: There was 1 warning in `dplyr::mutate()`.
ℹ In argument: `mol_sub_group = fct_relevel(...)`.
Caused by warning:
! 1 unknown level in `f`: HGG, PXA, TP53
retain_clusters_hgg <- hgg %>%
dplyr::count(cluster) %>%
filter(n >= 10) %>%
pull(cluster)
hgg <- hgg %>%
filter(cluster %in% retain_clusters_hgg) %>%
dplyr::mutate(cluster = factor(cluster)) %>%
dplyr::mutate(SI_group = case_when(
SI_SE > summary(SI_SE)["3rd Qu."] ~ "High SBI",
SI_SE < summary(SI_SE)["1st Qu."] ~ "Low SBI",
TRUE ~ NA_character_
)) %>%
dplyr::mutate(spliceosome_group = case_when(
spliceosome_gsva_score > summary(spliceosome_gsva_score)["3rd Qu."] ~ "Splice GSVA 4th Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["Median"] ~ "Splice GSVA 3rd Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["1st Qu."] ~ "Splice GSVA 2nd Q",
TRUE ~ "Splice GSVA 1st Q"
)) %>%
dplyr::mutate(SI_group = fct_relevel(SI_group,
c("High SBI", "Low SBI"))) %>%
dplyr::mutate(spliceosome_group = fct_relevel(spliceosome_group,
c("Splice GSVA 1st Q",
"Splice GSVA 2nd Q",
"Splice GSVA 3rd Q",
"Splice GSVA 4th Q")))
Generate HGG KM models with spliceosome_group as
covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
hgg_kap_os <- survival_analysis(
metadata = hgg %>% dplyr::filter(!is.na(spliceosome_group)),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ spliceosome_group with kap.meier
readr::write_rds(hgg_kap_os,
file.path(results_dir, "logrank_hgg_OS_splice_group.RDS"))
hgg_kap_efs <- survival_analysis(
metadata = hgg %>% dplyr::filter(!is.na(spliceosome_group)),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ spliceosome_group with kap.meier
readr::write_rds(hgg_kap_efs,
file.path(results_dir, "logrank_hgg_EFS_splice_group.RDS"))
Generate KM plots
km_hgg_os_plot <- plotKM(model = hgg_kap_os,
variable = "spliceosome_group",
combined = F,
title = "HGG, overall survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_hgg_OS_spliceosome_score.pdf"),
km_hgg_os_plot,
width = 9, height = 5, units = "in",
device = "pdf")
km_hgg_efs_plot <- plotKM(model = hgg_kap_efs,
variable = "spliceosome_group",
combined = F,
title = "HGG, event-free survival",
p_pos = "topright")
ggsave(file.path(plot_dir, "km_hgg_EFS_spliceosome_score.pdf"),
km_hgg_efs_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Generate coxph models for HGG including covariates
mol_sub_group cluster, and SI_SE,
and plot
add_model_hgg_os <- fit_save_model(hgg,
terms = "mol_sub_group+cluster+age_at_diagnosis_days+SI_SE",
file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_SBI.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning: Loglik converged before variable 9 ; coefficient may be infinite.
forest_hgg_os <- plotForest(readRDS(file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_SBI.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
forest_hgg_os
ggsave(file.path(plot_dir, "forest_add_OS_HGG_subtype_cluster_assignment_SBI.pdf"),
forest_hgg_os,
width = 9, height = 5, units = "in",
device = "pdf")
add_model_hgg_efs <- fit_save_model(hgg,
terms = "mol_sub_group+cluster+age_at_diagnosis_days+SI_SE",
file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_SBI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_hgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_SBI.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_HGG_subtype_cluster_assignment_SBI.pdf"),
forest_hgg_efs,
width = 9, height = 5, units = "in",
device = "pdf")
Repeat analysis replacing SI_SE with
spliceosome_gsva_score
add_model_hgg_os <- fit_save_model(hgg,
terms = "mol_sub_group+cluster+age_at_diagnosis_days+spliceosome_gsva_score",
file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning: Loglik converged before variable 9 ; coefficient may be infinite.
forest_hgg_os <- plotForest(readRDS(file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_spliceosome_score.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
forest_hgg_os
ggsave(file.path(plot_dir, "forest_add_OS_HGG_subtype_cluster_assignment_spliceosome_score.pdf"),
forest_hgg_os,
width = 9, height = 5, units = "in",
device = "pdf")
add_model_hgg_efs <- fit_save_model(hgg,
terms = "mol_sub_group+cluster+age_at_diagnosis_days+spliceosome_gsva_score",
file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_hgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_HGG_subtype_cluster_assignment_spliceosome_score.pdf"),
forest_hgg_efs,
width = 9, height = 5, units = "in",
device = "pdf")
Filter for cluster 6
cluster6_df <- metadata %>%
dplyr::filter(cluster == "Cluster 6",
!is.na(EFS_days)) %>%
dplyr::mutate(SI_group = case_when(
SI_SE > summary(SI_SE)["3rd Qu."] ~ "High SBI",
SI_SE < summary(SI_SE)["1st Qu."] ~ "Low SBI",
TRUE ~ NA_character_
)) %>%
dplyr::mutate(spliceosome_group = case_when(
spliceosome_gsva_score > summary(spliceosome_gsva_score)["3rd Qu."] ~ "Splice GSVA 4th Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["Median"] ~ "Splice GSVA 3rd Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["1st Qu."] ~ "Splice GSVA 2nd Q",
TRUE ~ "Splice GSVA 1st Q"
)) %>%
dplyr::mutate(SI_group = fct_relevel(SI_group,
c("High SBI", "Low SBI"))) %>%
dplyr::mutate(spliceosome_group = fct_relevel(spliceosome_group,
c("Splice GSVA 1st Q",
"Splice GSVA 2nd Q",
"Splice GSVA 3rd Q",
"Splice GSVA 4th Q")))
Generate KM models with SI_group as covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
c6_si_kap_os <- survival_analysis(
metadata = cluster6_df %>% dplyr::filter(!is.na(SI_group)),
ind_var = "SI_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ SI_group with kap.meier
readr::write_rds(c6_si_kap_os,
file.path(results_dir, "logrank_cluster6_OS_SBI.RDS"))
c6_si_kap_efs <- survival_analysis(
metadata = cluster6_df %>% dplyr::filter(!is.na(SI_group)),
ind_var = "SI_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ SI_group with kap.meier
readr::write_rds(c6_si_kap_efs,
file.path(results_dir, "logrank_cluster6_EFS_SBI.RDS"))
Generate Cluster 6 KM SI_group plots
km_c6_si_os_plot <- plotKM(model = c6_si_kap_os,
variable = "SI_group",
combined = F,
title = "Cluster 6, overall survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_OS_sbi_group.pdf"),
km_c6_si_os_plot,
width = 8, height = 5, units = "in",
device = "pdf")
km_c6_si_efs_plot <- plotKM(model = c6_si_kap_efs,
variable = "SI_group",
combined = F,
title = "Cluster 6, event-free survival",
p_pos = "topright")
ggsave(file.path(plot_dir, "km_cluster6_EFS_sbi_group.pdf"),
km_c6_si_efs_plot,
width = 8, height = 5, units = "in",
device = "pdf")
Generate KM models with spliceosome_group as
covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
c6_splice_kap_os <- survival_analysis(
metadata = cluster6_df %>%
dplyr::filter(spliceosome_group %in% c("Splice GSVA 4th Q", "Splice GSVA 1st Q")) %>%
dplyr::mutate(spliceosome_group = factor(spliceosome_group,
levels = c("Splice GSVA 1st Q", "Splice GSVA 4th Q"))),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ spliceosome_group with kap.meier
readr::write_rds(c6_splice_kap_os,
file.path(results_dir, "logrank_cluster6_OS_splice_group.RDS"))
c6_splice_kap_efs <- survival_analysis(
metadata = cluster6_df %>%
dplyr::filter(spliceosome_group %in% c("Splice GSVA 4th Q", "Splice GSVA 1st Q")) %>%
dplyr::mutate(spliceosome_group = factor(spliceosome_group,
levels = c("Splice GSVA 1st Q", "Splice GSVA 4th Q"))),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ spliceosome_group with kap.meier
readr::write_rds(c6_splice_kap_efs,
file.path(results_dir, "logrank_cluster6_EFS_splice_group.RDS"))
Generate Cluster 6 KM spliceosome_group plots
km_c6_splice_os_plot <- plotKM(model = c6_splice_kap_os,
variable = "spliceosome_group",
combined = F,
title = "Cluster 6, overall survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_OS_splice_group.pdf"),
km_c6_splice_os_plot,
width = 9, height = 5, units = "in",
device = "pdf")
km_c6_splice_efs_plot <- plotKM(model = c6_splice_kap_efs,
variable = "spliceosome_group",
combined = F,
title = "Cluster 6, event-free survival",
p_pos = "topright")
ggsave(file.path(plot_dir, "km_cluster6_EFS_splice_group.pdf"),
km_c6_splice_efs_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Assess EFS and OS by SBI or spliceosome GSVA score in multivariate models and generate forest plots
add_model_c6_efs <- fit_save_model(cluster6_df %>%
dplyr::filter(extent_of_tumor_resection != "Unavailable",
spliceosome_group %in% c("Splice GSVA 4th Q", "Splice GSVA 1st Q")) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_days+Histology+spliceosome_group",
file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning: There was 1 warning in `dplyr::mutate()`.
ℹ In argument: `Histology = fct_relevel(...)`.
Caused by warning:
! 2 unknown levels in `f`: Ependymoma and Low-grade glioma
forest_c6_spliceosome_efs <- plotForest(readRDS(file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_cluster6_histology_resection_spliceosome_group.pdf"),
forest_c6_spliceosome_efs,
width = 9, height = 4, units = "in",
device = "pdf")
add_model_c6_os <- fit_save_model(cluster6_df %>%
dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
dplyr::rename("SBI_SE" = SI_SE) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_days+Histology+SBI_SE",
file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_si_group.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning: Loglik converged before variable 9 ; coefficient may be infinite.
forest_c6_si_os <- plotForest(readRDS(file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_si_group.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_OS_cluster6_histology_resection_si.pdf"),
forest_c6_si_os,
width = 9, height = 4, units = "in",
device = "pdf")
Print session info
sessionInfo()
R version 4.4.2 (2024-10-31)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] grid stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] gtools_3.9.5 survminer_0.5.0 patchwork_1.3.0 ggpubr_0.6.0 survival_3.7-0
[6] ggthemes_5.1.0 rprojroot_2.0.4 ggrepel_0.9.6 DESeq2_1.46.0 SummarizedExperiment_1.36.0
[11] Biobase_2.66.0 MatrixGenerics_1.18.1 matrixStats_1.5.0 GenomicRanges_1.58.0 GenomeInfoDb_1.42.3
[16] IRanges_2.40.1 S4Vectors_0.44.0 BiocGenerics_0.52.0 lubridate_1.9.4 forcats_1.0.0
[21] stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
[26] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] gridExtra_2.3 rlang_1.1.5 magrittr_2.0.3 compiler_4.4.2 systemfonts_1.2.1 vctrs_0.6.5
[7] pkgconfig_2.0.3 crayon_1.5.3 fastmap_1.2.0 backports_1.5.0 XVector_0.46.0 labeling_0.4.3
[13] KMsurv_0.1-5 utf8_1.2.4 rmarkdown_2.29 markdown_1.13 tzdb_0.4.0 UCSC.utils_1.2.0
[19] ragg_1.3.3 bit_4.5.0.1 xfun_0.50 cachem_1.1.0 zlibbioc_1.52.0 jsonlite_1.9.1
[25] DelayedArray_0.32.0 BiocParallel_1.40.0 broom_1.0.7 parallel_4.4.2 R6_2.6.1 bslib_0.9.0
[31] stringi_1.8.4 car_3.1-3 jquerylib_0.1.4 Rcpp_1.0.14 knitr_1.49 zoo_1.8-12
[37] Matrix_1.7-1 splines_4.4.2 timechange_0.3.0 tidyselect_1.2.1 rstudioapi_0.17.1 abind_1.4-8
[43] yaml_2.3.10 ggtext_0.1.2 codetools_0.2-20 lattice_0.22-6 withr_3.0.2 evaluate_1.0.3
[49] xml2_1.3.6 survMisc_0.5.6 pillar_1.10.1 colorblindr_0.1.0 carData_3.0-5 rsconnect_1.3.4
[55] generics_0.1.3 vroom_1.6.5 hms_1.1.3 commonmark_1.9.5 munsell_0.5.1 scales_1.3.0
[61] xtable_1.8-4 glue_1.8.0 tools_4.4.2 data.table_1.16.4 locfit_1.5-9.12 ggsignif_0.6.4
[67] cowplot_1.1.3.9000 colorspace_2.1-1 GenomeInfoDbData_1.2.13 Formula_1.2-5 cli_3.6.4 km.ci_0.5-6
[73] textshaping_1.0.0 S4Arrays_1.6.0 gtable_0.3.6 rstatix_0.7.2 sass_0.4.9 digest_0.6.37
[79] SparseArray_1.6.2 farver_2.1.2 htmltools_0.5.8.1 lifecycle_1.0.4 httr_1.4.7 gridtext_0.1.5
[85] bit64_4.6.0-1